1 Effect of UPSTM-Based Decorrelation on Feature Discovery

1.0.1 Loading the libraries

library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)

op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

1.1 Material and Methods

About Dataset Context The principal diagnosis method for SARS-CoV-2 is a PCR technique, which allows for the detection of a genetic material of a pathogen or microorganism and it has high specificity, sensitivity and helps diagnosing even in the first stages of infection. However, it is not the fastest method to use in this situation and it’s very time consuming.

Raman spectroscopy could be used as a cheap and quick method to diagnose infection by SARS-CoV-2.

Content Inside this dataset there’re three files, each containing the raw spectra of healthy and infected subjects, unknown-state subjects and the probe used with saline solution inside. The data was in MATLAB’s proprietary format and was transformed to CSV using Python’s Pandas library.

Source The source of this data was found in the following DOI: https://doi.org/10.6084/m9.figshare.12159924.v1

Acknowledgements If used cite: Yin, Gang; Li, Lintao; Lu, Shun; Yin, Yu; Su, Yuanzhang; Zeng, Yilan; et al. (2020): Data and code on serum Raman spectroscopy as an efficient primary screening of coronavirus disease in 2019 (COVID-19). figshare. Dataset. https://doi.org/10.6084/m9.figshare.12159924.v1

1.2 Data: The COVID_19 Data-Set

The data to process is described in:

https://www.kaggle.com/datasets/sfran96/raman-spectroscopy-for-detecting-covid19?select=covid_and_healthy_spectra.csv

I added a column to the data identifying the repeated experiments.


SalivaIR <- read.csv("~/GitHub/LatentBiomarkers/Data/RAMANCOVID19/covid_and_healthy_spectra.csv")
SalivaIR$X400 <- NULL
SalivaIR$RepID <- rep(c(1:3),103) 
SalivaIR_set1 <- subset(SalivaIR,RepID==1)
SalivaIR_set1$RepID <- NULL
diagnos <- SalivaIR_set1$diagnostic
SalivaIR_set1$diagnostic <- NULL

SalivaIR_set2 <- subset(SalivaIR,RepID==2)
SalivaIR_set2$RepID <- NULL
SalivaIR_set2$diagnostic <- NULL

SalivaIR_set3 <- subset(SalivaIR,RepID==3)
SalivaIR_set3$RepID <- NULL
SalivaIR_set3$diagnostic <- NULL

SalivaIR_Avg <- (SalivaIR_set1 + SalivaIR_set2 + SalivaIR_set3)/3

SalivaIR_Avg$class <- 1*(diagnos=="SARS-CoV-2")

pander::pander(table(SalivaIR_Avg$class))
0 1
50 53

1.2.0.1 Standarize the names for the reporting

studyName <- "IRSaliva_2"
dataframe <- SalivaIR_Avg
outcome <- "class"

TopVariables <- 10

thro <- 0.80
cexheat = 0.15

1.3 Generaring the report

1.3.1 Libraries

Some libraries

library(psych)
library(whitening)
library("vioplot")
library("rpart")

1.3.2 Data specs

pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
rows col
103 899
pander::pander(table(dataframe[,outcome]))
0 1
50 53

varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]

largeSet <- length(varlist) > 1500 

1.3.3 Scaling the data

Scaling and removing near zero variance columns and highly co-linear(r>0.99999) columns


  ### Some global cleaning
  sdiszero <- apply(dataframe,2,sd) > 1.0e-16
  dataframe <- dataframe[,sdiszero]

  varlist <- colnames(dataframe)[colnames(dataframe) != outcome]
  tokeep <- c(as.character(correlated_Remove(dataframe,varlist,thr=0.99999)),outcome)
  dataframe <- dataframe[,tokeep]

  varlist <- colnames(dataframe)
  varlist <- varlist[varlist != outcome]
  
  iscontinous <- sapply(apply(dataframe,2,unique),length) >= 5 ## Only variables with enough samples



dataframeScaled <- FRESAScale(dataframe,method="OrderLogit")$scaledData

1.4 The heatmap of the data

numsub <- nrow(dataframe)
if (numsub > 1000) numsub <- 1000


if (!largeSet)
{

  hm <- heatMaps(data=dataframeScaled[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 xlab="Feature",
                 ylab="Sample",
                 srtCol=45,
                 srtRow=45,
                 cexCol=cexheat,
                 cexRow=cexheat
                 )
  par(op)
}

1.4.0.1 Correlation Matrix of the Data

The heat map of the data


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  #cormat <- Rfast::cora(as.matrix(dataframe[,varlist]),large=TRUE)
  cormat <- cor(dataframe[,varlist],method="pearson")
  cormat[is.na(cormat)] <- 0
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Original Correlation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.9991348

1.5 The decorrelation


DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#> 
#>  Included: 890 , Uni p: 0.02782805 , Uncorrelated Base: 4 , Outcome-Driven Size: 0 , Base Size: 4 
#> 
#> 
 1 <R=0.999,thr=0.900,N=  836>, Top: 26( 140 )[ 1 : 26 Fa= 26 : 0.900 ]( 26 , 360 , 0 ),<|>Tot Used: 386 , Added: 360 , Zero Std: 0 , Max Cor: 0.999
#> 
 2 <R=0.999,thr=0.900,N=  836>, Top: 34( 21 )[ 1 : 34 Fa= 60 : 0.900 ]( 34 , 255 , 26 ),<|>Tot Used: 558 , Added: 255 , Zero Std: 0 , Max Cor: 0.995
#> 
 3 <R=0.995,thr=0.900,N=  836>, Top: 48( 29 )[ 1 : 48 Fa= 107 : 0.900 ]( 48 , 188 , 60 ),<|>Tot Used: 699 , Added: 188 , Zero Std: 0 , Max Cor: 0.988
#> 
 4 <R=0.988,thr=0.900,N=  836>, Top: 34( 2 )[ 1 : 34 Fa= 140 : 0.900 ]( 34 , 87 , 107 ),<|>Tot Used: 781 , Added: 87 , Zero Std: 0 , Max Cor: 0.967
#> 
 5 <R=0.967,thr=0.900,N=  836>, Top: 15( 2 )[ 1 : 15 Fa= 153 : 0.900 ]( 15 , 20 , 140 ),<|>Tot Used: 804 , Added: 20 , Zero Std: 0 , Max Cor: 0.905
#> 
 6 <R=0.905,thr=0.900,N=  836>, Top: 1( 1 )[ 1 : 1 Fa= 153 : 0.900 ]( 1 , 1 , 153 ),<|>Tot Used: 804 , Added: 1 , Zero Std: 0 , Max Cor: 0.900
#> 
 7 <R=0.900,thr=0.800,N=  438>, Top: 117( 2 ).=[ 2 : 117 Fa= 224 : 0.842 ]( 112 , 212 , 153 ),<|>Tot Used: 842 , Added: 212 , Zero Std: 0 , Max Cor: 0.956
#> 
 8 <R=0.956,thr=0.900,N=    2>, Top: 1( 1 )[ 1 : 1 Fa= 224 : 0.900 ]( 1 , 1 , 224 ),<|>Tot Used: 842 , Added: 1 , Zero Std: 0 , Max Cor: 0.900
#> 
 9 <R=0.900,thr=0.800,N=   96>, Top: 28( 1 )[ 1 : 28 Fa= 233 : 0.800 ]( 27 , 50 , 224 ),<|>Tot Used: 862 , Added: 50 , Zero Std: 0 , Max Cor: 0.893
#> 
 10 <R=0.893,thr=0.800,N=   96>, Top: 7( 2 )[ 1 : 7 Fa= 235 : 0.800 ]( 7 , 11 , 233 ),<|>Tot Used: 868 , Added: 11 , Zero Std: 0 , Max Cor: 0.800
#> 
 11 <R=0.800,thr=0.800,N=   96>
#> 
 [ 11 ], 0.7999365 Decor Dimension: 868 Nused: 868 . Cor to Base: 377 , ABase: 1 , Outcome Base: 0 
#> 
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]

pander::pander(sum(apply(dataframe[,varlist],2,var)))

0.0708

pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))

0.00306

pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))

4.18

pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))

3.8

1.5.1 The decorrelation matrix


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  
  UPSTM <- attr(DEdataframe,"UPSTM")
  
  gplots::heatmap.2(1.0*(abs(UPSTM)>0),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Decorrelation matrix",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="|Beta|>0",
                    xlab="Output Feature", ylab="Input Feature")
  
  par(op)
}

1.6 The heatmap of the decorrelated data

if (!largeSet)
{

  hm <- heatMaps(data=DEdataframe[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 cexRow = cexheat,
                 cexCol = cexheat,
                 srtCol=45,
                 srtRow=45,
                 xlab="Feature",
                 ylab="Sample")
  par(op)
}

1.7 The correlation matrix after decorrelation

if (!largeSet)
{

  cormat <- cor(DEdataframe[,varlistc],method="pearson")
  cormat[is.na(cormat)] <- 0
  
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Correlation after IDeA",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  
  par(op)
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.7999365

1.8 U-MAP Visualization of features

1.8.1 The UMAP based on LASSO on Raw Data


if (nrow(dataframe) < 1000)
{
  classes <- unique(dataframe[1:numsub,outcome])
  raincolors <- rainbow(length(classes))
  names(raincolors) <- classes
  datasetframe.umap = umap(scale(dataframe[1:numsub,varlist]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
  text(datasetframe.umap$layout,labels=dataframe[1:numsub,outcome],col=raincolors[dataframe[1:numsub,outcome]+1])
}

1.8.2 The decorralted UMAP

if (nrow(dataframe) < 1000)
{

  datasetframe.umap = umap(scale(DEdataframe[1:numsub,varlistc]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After IDeA",t='n')
  text(datasetframe.umap$layout,labels=DEdataframe[1:numsub,outcome],col=raincolors[DEdataframe[1:numsub,outcome]+1])
}

1.9 Univariate Analysis

1.9.1 Univariate



univarRAW <- uniRankVar(varlist,
               paste(outcome,"~1"),
               outcome,
               dataframe,
               rankingTest="AUC")

100 : X641 200 : X869 300 : X1088 400 : X1293 500 : X1486
600 : X1667 700 : X1833 800 : X1985




univarDe <- uniRankVar(varlistc,
               paste(outcome,"~1"),
               outcome,
               DEdataframe,
               rankingTest="AUC",
               )

100 : La_X641 200 : La_X869 300 : La_X1088 400 : La_X1293 500 : La_X1486
600 : La_X1667 700 : X1833 800 : La_X1985

1.9.2 Final Table


univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")

##top variables
topvar <- c(1:length(varlist)) <= TopVariables
tableRaw <- univarRAW$orderframe[topvar,univariate_columns]
pander::pander(tableRaw)
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
X2078 -0.00831 0.000859 -0.00645 0.000455 0.990 0.944
X2045 -0.01737 0.000858 -0.01574 0.000713 0.388 0.937
X2080 -0.00870 0.000957 -0.00671 0.000464 0.985 0.936
X2086 -0.01137 0.001083 -0.00908 0.000517 0.783 0.935
X2046 -0.01814 0.000941 -0.01636 0.000604 0.882 0.935
X2077 -0.00826 0.000843 -0.00674 0.000458 0.959 0.934
X2085 -0.01060 0.001052 -0.00825 0.000465 0.799 0.934
X2088 -0.01167 0.000953 -0.00970 0.000500 0.898 0.932
X2048 -0.01880 0.000968 -0.01698 0.000660 0.672 0.928
X2049 -0.01907 0.001017 -0.01718 0.000782 0.868 0.926


topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]


pander::pander(finalTable)
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
La_X515 0.003190 0.001213 0.000713 0.001227 0.524 0.938
X2048 -0.018797 0.000968 -0.016978 0.000660 0.672 0.928
X2082 -0.009254 0.001368 -0.007293 0.000440 0.909 0.918
La_X1163 0.012026 0.002648 0.007166 0.001884 0.841 0.918
La_X524 0.000851 0.001333 -0.001629 0.001054 0.530 0.916
La_X967 0.005795 0.003925 -0.001383 0.002809 0.919 0.915
La_X601 -0.008575 0.002024 -0.005295 0.001687 0.841 0.897
La_X837 0.011911 0.004703 0.004587 0.003576 0.975 0.895
La_X553 0.000555 0.000738 0.001866 0.000698 0.697 0.893
La_X1718 0.003177 0.001105 0.001549 0.000738 0.595 0.892

dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")


pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
mean total fraction
2.46 820 0.921

theCharformulas <- attr(dc,"LatentCharFormulas")


finalTable <- rbind(finalTable,tableRaw[topvar[!(topvar %in% topLAvar)],univariate_columns])


orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- theCharformulas[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]

Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores")

finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
  DecorFormula caseMean caseStd controlMean controlStd controlKSP ROCAUC RAWAUC fscores
X2078 NA -0.008314 0.000859 -0.006451 0.000455 0.990 0.944 0.944 NA
La_X515 - (0.964)X512 + X515 - (0.029)X615 0.003190 0.001213 0.000713 0.001227 0.524 0.938 0.666 -2
X2045 NA -0.017370 0.000858 -0.015739 0.000713 0.388 0.937 0.937 NA
X2080 NA -0.008703 0.000957 -0.006708 0.000464 0.985 0.936 0.936 NA
X2086 NA -0.011371 0.001083 -0.009082 0.000517 0.783 0.935 0.935 NA
X2046 NA -0.018144 0.000941 -0.016363 0.000604 0.882 0.935 0.935 NA
X2077 NA -0.008257 0.000843 -0.006736 0.000458 0.959 0.934 0.934 NA
X2085 NA -0.010600 0.001052 -0.008254 0.000465 0.799 0.934 0.934 NA
X2088 NA -0.011674 0.000953 -0.009700 0.000500 0.898 0.932 0.932 NA
X2048 NA -0.018797 0.000968 -0.016978 0.000660 0.672 0.928 0.928 13
X20481 NA -0.018797 0.000968 -0.016978 0.000660 0.672 0.928 NA NA
X2049 NA -0.019069 0.001017 -0.017178 0.000782 0.868 0.926 0.926 NA
X2082 NA -0.009254 0.001368 -0.007293 0.000440 0.909 0.918 0.918 NA
La_X1163 + X1163 - (0.900)X1167 0.012026 0.002648 0.007166 0.001884 0.841 0.918 0.592 0
La_X524 + X524 - (0.966)X527 - (0.046)X615 0.000851 0.001333 -0.001629 0.001054 0.530 0.916 0.655 -2
La_X967 + X967 - (0.446)X1328 0.005795 0.003925 -0.001383 0.002809 0.919 0.915 0.657 3
La_X601 + X601 - (0.966)X615 -0.008575 0.002024 -0.005295 0.001687 0.841 0.897 0.552 5
La_X837 - (1.165)X795 + X837 - (1.017)X1328 0.011911 0.004703 0.004587 0.003576 0.975 0.895 0.643 -2
La_X553 + X553 - (1.030)X555 + (0.046)X615 0.000555 0.000738 0.001866 0.000698 0.697 0.893 0.575 -2
La_X1718 - (1.014)X1698 + X1718 0.003177 0.001105 0.001549 0.000738 0.595 0.892 0.546 -1

1.10 Comparing IDeA vs PCA vs EFA

1.10.1 PCA

featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,scale. = TRUE)   #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous]) 
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)

#pander::pander(pc$rotation)


PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])


  gplots::heatmap.2(abs(PCACor),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "PCA Correlation",
                    cexRow = 0.5,
                    cexCol = 0.5,
                     srtCol=45,
                     srtRow= -45,
                    key.title=NA,
                    key.xlab="Pearson Correlation",
                    xlab="Feature", ylab="Feature")

1.10.2 EFA


EFAdataframe <- dataframeScaled

if (length(iscontinous) < 2000)
{
  topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)/2)
  if (topred < 2) topred <- 2
  
  uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE)  # EFA analysis
  predEFA <- predict(uls,dataframeScaled[,iscontinous])
  EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
  colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous]) 


  
  EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
  
  
    gplots::heatmap.2(abs(EFACor),
                      trace = "none",
    #                  scale = "row",
                      mar = c(5,5),
                      col=rev(heat.colors(5)),
                      main = "EFA Correlation",
                      cexRow = 0.5,
                      cexCol = 0.5,
                       srtCol=45,
                       srtRow= -45,
                      key.title=NA,
                      key.xlab="Pearson Correlation",
                      xlab="Feature", ylab="Feature")
}

1.11 Effect on CAR modeling

par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(rawmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
  }


pander::pander(table(dataframe[,outcome],pr))
  0 1
0 50 0
1 6 47
pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.942 0.878 0.978
3 se 0.887 0.770 0.957
4 sp 1.000 0.929 1.000
6 diag.or Inf NA Inf

par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe,control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(IDeAmodel,main="IDeA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(IDeAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
  }

pander::pander(table(DEdataframe[,outcome],pr))
  0 1
0 46 4
1 1 52
pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.951 0.890 0.984
3 se 0.981 0.899 1.000
4 sp 0.920 0.808 0.978
6 diag.or 598.000 64.500 5544.261

par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
  plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
  text(PCAmodel, use.n = TRUE,cex=0.75)
  ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}

pander::pander(table(PCAdataframe[,outcome],pr))
  0 1
0 44 6
1 4 49
pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.903 0.829 0.952
3 se 0.925 0.818 0.979
4 sp 0.880 0.757 0.955
6 diag.or 89.833 23.782 339.333


par(op)

1.11.1 EFA


  EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
  EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
  pr <- predict(EFAmodel,EFAdataframe,type = "class")
  
  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(EFAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
  }


  pander::pander(table(EFAdataframe[,outcome],pr))
  0 1
0 46 4
1 3 50
  pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.932 0.865 0.972
3 se 0.943 0.843 0.988
4 sp 0.920 0.808 0.978
6 diag.or 191.667 40.698 902.650
  par(op)